{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Survival Analysis\n",
    "\n",
    "- The first function draws the Survival Curve (Kaplan-Meier curve).\n",
    "- The second function implements the logrank test, comparing two survival curves.\n",
    "\n",
    "The formulas and the example are taken from Altman, Chapter 13\n",
    "\n",
    "Author : Thomas Haslwanter, Date : Feb-2017"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy import stats\n",
    "from urllib.request import urlopen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [],
   "source": [
    "def kaplanmeier(data):\n",
    "    '''Determine and the Kaplan-Meier curve for the given data.\n",
    "    Censored times are indicated with \"1\" in the second column, uncensored with \"0\"'''\n",
    "    times = data[:,0]\n",
    "    censored = data[:,1]\n",
    "    atRisk = np.arange(len(times),0,-1)\n",
    "    \n",
    "    failures = times[censored==0]\n",
    "    num_failures = len(failures)\n",
    "    p = np.ones(num_failures+1)\n",
    "    r = np.zeros(num_failures+1)\n",
    "    se = np.zeros(num_failures+1)\n",
    "    \n",
    "    # Calculate the numbers-at-risk, the survival probability, and the standard error\n",
    "    for ii in range(num_failures):\n",
    "        if failures[ii] == failures[ii-1]:\n",
    "            r[ii+1] = r[ii]\n",
    "            p[ii+1] = p[ii]\n",
    "            se[ii+1] = se[ii]\n",
    "            \n",
    "        else:\n",
    "            r[ii+1] = np.max(atRisk[times==failures[ii]])\n",
    "            p[ii+1] = p[ii] * (r[ii+1] - sum(failures==failures[ii]))/r[ii+1]\n",
    "            se[ii+1] = p[ii+1]*np.sqrt((1-p[ii+1])/r[ii+1])\n",
    "            # confidence intervals could be calculated as ci = p +/- 1.96 se\n",
    "    \n",
    "    # Plot survival curve (Kaplan-Meier curve)\n",
    "    # Always start at t=0 and p=1, and make a line until the last measurement\n",
    "    t = np.hstack((0, failures, np.max(times)))\n",
    "    sp = np.hstack((p, p[-1]))\n",
    "    \n",
    "    return(p,atRisk,t,sp,se)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.text.Text at 0x206241de240>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGqJJREFUeJzt3XmYFfWd7/H3hxZtRZS44EK7YIKj4NJiq2h04jZXjIlc\nozMjOKI33sv4GLJoxrlksol5fGImGozRLCQ6IE+MS5IZGUevOi7RG8eliUQDuBDE2EouBBNxIyx+\n7x9VXR6b7tPVS53q0/15Pc95zqmq36n6loXn2/Wr36KIwMzMDGBY2QGYmdnA4aRgZmYZJwUzM8s4\nKZiZWcZJwczMMk4KZmaWKSwpSLpR0mpJv+liuyRdK2m5pKclTSwqFjMzy6fIO4V5wOQq208FxqWv\nGcD3CozFzMxyKCwpRMTDwGtVikwBborEY8AoSXsUFY+ZmXVvqxKPPQZ4uWK5LV23qmNBSTNI7iYY\nMWLE4QcccECPD/bW8hcZtv6dHn1nmw1/5s9bb8Oq3fbp8fHq3TsbN7Pt8Ab223VE2aGYWT9YtGjR\nHyJi1+7KlZkUcouIucBcgJaWlmhtba3NgY8/Pnl/6KHaHG8A+dsf/BcAt/790SVHYmb9QdJLecqV\n2froFWCviuWmdJ2ZmZWkzKSwEJietkKaBLweEVtUHZmZWe0UVn0k6SfA8cAuktqArwLDASLi+8Bd\nwEeB5cDbwP8oKhYzM8unsKQQEVO72R7Ap4o6vpkNPRs3bqStrY3169eXHUppGhsbaWpqYvjw4b36\nfl08aDYzy6OtrY2RI0ey7777IqnscGouIli7di1tbW2MHTu2V/twUujO4sXvtULqqWnTYMaMfg3H\nzLq2fv36IZsQACSx8847s2bNml7vw0mhmmnTev/dxYuTdycFs5oaqgmhXV/P30mhmhkzev+j3tu7\nCzOzEjkpWJeWrlqXdWKrd1OaxzDtqL3LDsOGgIaGBg4++GA2btzIVlttxfTp07n44osZNqzrHgAr\nV67k0UcfZVqO2onJkyfz2GOPceyxx3LnnXf2Z+iAh862LkxpHsP4PXYoO4x+sXTVOu5Y7H6RVhvb\nbrstixcvZsmSJdx3333cfffdzJ49u+p3Vq5cyc0335xr/5deeikLFizoj1A75TsF69S0o/YeNH9Z\nD5a7Has/o0ePZu7cuRxxxBFcdtllvPTSS5x77rm89dZbAFx33XUcc8wxzJo1i2XLltHc3Mx5553H\nGWec0Wk5gJNOOomHChx6x0nBzAal2f++hKWvruvXfY7fcwe++vEJPfrOfvvtx+bNm1m9ejWjR4/m\nvvvuo7GxkRdeeIGpU6fS2trKlVdeyVVXXZVVB7399tudlqsFJwUzsxrZuHEjM2fOZPHixTQ0NPD8\n88/3qVwRnBTMbFDq6V/0RVmxYgUNDQ2MHj2a2bNns9tuu/HrX/+ad999l8bGxk6/M2fOnFzliuAH\nzWZmBVmzZg0XXnghM2fORBKvv/46e+yxB8OGDWPBggVs3rwZgJEjR/LGG29k3+uqXC34TsHMrB+9\n8847NDc3Z01Szz33XC655BIALrroIs4880xuv/12TjjhBEaMSCaxOuSQQ2hoaODQQw/l/PPP77Ic\nwHHHHcezzz7Lm2++SVNTEzfccAOnnHJKv8XvpGBm1o+q/VU/btw4nn766Wz561//OgDDhw/ngQce\neF/ZzsoBPPLII/0VaqdcfWRmZhnfKdiQUNk7272bzbrmpGCD3pTmMdnnpauSdutOCmadc1KwQa+y\nd7Z7N5tV56RQpM7mYvAcC2Y2gDkpFKWz0Q49x4KZDXBufVSUGTPgoYfe/2puLjcmMytcQ0MDzc3N\nTJgwgUMPPZSrr76ad999t+p38o6SunjxYo4++mgmTJjAIYccwq233tpfYWd8p2Bm1o/ah84GWL16\nNdOmTWPdunVVh89uTwrdzaew3XbbcdNNNzFu3DheffVVDj/8cE455RRGjRrVb/H7TsHMrCDtQ2df\nd911RAQrV67kuOOOY+LEiUycOJFHH30UgFmzZvHII4/Q3NzMnDlzuiy3//77M27cOAD23HNPRo8e\n3af5mDvjOwUzG5w+97n3nuP1l+ZmuOaaHn2lqKGzn3jiCTZs2MAHP/jBfjs9cFIwM6uZ/ho6e9Wq\nVZx77rnMnz+/6jSfveGkYGaDUw//oi9Kfw+dvW7dOk477TSuuOIKJk2a1O/xOinUWmd9F4rkfhFb\nqBzywvLz8CA919nQ2U1NTQwbNoz58+dXHTq7s3IbNmzgjDPOYPr06Zx11lmFxOykUEvdtCzod+4X\nsYXKIS8sPw8Pkl+RQ2ffdtttPPzww6xdu5Z58+YBMG/ePJr7sbm7IqLfdlYLLS0tUau5Sute+x1J\ngZN829DQfmd1698fXXIk1S1btowDDzyw7DBK19l/B0mLIqKlu++6SaqZmWWcFMzMLOOkYGaDSr1V\nife3vp6/k4KZDRqNjY2sXbt2yCaGiGDt2rVdNnXNw62PzGzQaGpqoq2trd+HfqgnjY2NNDU19fr7\nTgqDXa37RVjX3GekcMOHD2fs2LFlh1HXnBQGs1r3i7Cuuc+I1YlCk4KkycC3gQbgRxFxZYftewPz\ngVFpmVkRcVeRMQ0pM2b4R2ig8N2a1YnCHjRLagCuB04FxgNTJY3vUOxLwG0RcRhwNvDdouIxM7Pu\nFXmncCSwPCJWAEi6BZgCLK0oE8AO6ecdgVcLjMfM+qBWY0Z5jKVyFZkUxgAvVyy3AUd1KHMZcK+k\nTwMjgJM725GkGcAMgL339j8Ws1qr1ZhRHmOpfGU/aJ4KzIuIqyUdDSyQdFBEvG9C04iYC8yFZOyj\nEuI0G9KmHbV3TX6oPXpt+YrsvPYKsFfFclO6rtIFwG0AEfFfQCOwS4ExmZlZFUXeKTwJjJM0liQZ\nnA10bCP5O+AkYJ6kA0mSwtDtdWKDW2d9Rtx3wQaYwpJCRGySNBO4h6S56Y0RsUTS5UBrRCwEPg/8\nUNLFJA+dz4+h2j/dBrfO+oy474INQIU+U0j7HNzVYd1XKj4vBT5cZAxmA0JnfUbcd8EGIA+IZ2Zm\nGScFMzPLOCmYmVnGScHMzDJld14zM3ufWg2nUY/G77kDX/34hEKP4aRgZgNGrYbTsK45KZiVKe8k\nSEOkk1uthtOwrjkpmJUl7yRI7uRmNeSkYFaWvJMguZOb1ZBbH5mZWcZJwczMMk4KZmaW6TYpSDq4\nFoGYmVn58twpfFfSE5IukrRj4RGZmVlpuk0KEXEccA7JLGqLJN0s6a8Kj8zMzGou1zOFiHgB+BLw\nv4GPANdKelbSJ4oMzszMaivPM4VDJM0BlgEnAh+PiAPTz3MKjs/MzGooT+e17wA/Av4pIt5pXxkR\nr0r6UmGRmZlZzeWpPvrXiFhQmRAkfRYgIhYUFpmZmdVcnqQwvZN15/dzHGZmNgB0WX0kaSowDRgr\naWHFppHAa0UHZmZmtVftmcKjwCpgF+DqivVvAE8XGZSZmZWjy6QQES8BLwFH1y4cM+tU3nkX+mqI\nzNtgXatWffR/I+JYSW8AUbkJiIjYofDozCz/vAt95XkbjOp3Csem7yNrF46ZbSHvvAt95XkbjOp3\nCjtV+2JE+GGzmdkgU+1B8yKSaiN1si2A/QqJyMzMSlOt+mhsLQMxM7PyVas+OiAinpU0sbPtEfGr\n4sIyM7MyVKs+ugSYwfv7KLQLkgHxzMxsEKlWfTQjfT+hduGYWamq9YdwH4YhodtRUiU1AhcBx5Lc\nITwCfD8i1hccm5nVUrX+EO7DMGTkGTr7JpKhLb6TLk8DFgB/XVRQZlaCav0h3IdhyMiTFP4iIg6t\nWH5Q0q+LCsjMzMqTZ+jspyRNal+QdBTwyzw7lzRZ0nOSlkua1UWZv5G0VNISSTfnC9vMzIpQrUnq\nMyTPEIYD0yX9Ll3eh2RqzqokNQDXA38FtAFPSloYEUsryowDvgB8OCL+KGl0X07GzMz6plr10cf6\nuO8jgeURsQJA0i3AFGBpRZn/BVwfEX8EiIjVfTymmZn1QZfVRxHxUuULeIfkTqH91Z0xwMsVy23p\nukr7A/tL+qWkxyRN7mxHkmZIapXUumbNmhyHNjOz3uj2mYKk0yW9ALwI/AJYCdzdT8ffChgHHA9M\nBX4oaVTHQhExNyJaIqJl11137adDm5lZR3keNH8NmAQ8n46HdBL5HjS/AuxVsdyUrqvUBiyMiI0R\n8SLwPEmSMDOzEuRJChsjYi0wTNKwiHgQaM7xvSeBcZLGStoaOBtY2KHMv5HcJSBpF5LqpBV5gzcz\ns/6Vp5/CnyRtT9KT+ceSVgObuvtSRGySNBO4B2gAboyIJZIuB1ojYmG67b9JWgpsBi5NE5CZmZVA\nEdWfGUsaAawnmVfhHGBH4Mdl/Xi3tLREa2trGYc2G7raezQ/9FCZUVgfSFoUES3dlev2TiEi3pK0\nO0kT09eAe/zXvJnZ4JSn9dH/BJ4APgGcBTwm6ZNFB2ZmZrWX55nCpcBh7XcHknYGHgVuLDIwMzOr\nvTxJoY1klNR2b/D+TmlmNhRUm2uh3nhuiC5VG/vokvTjK8Djku4g6ck8haQ6ycyGimpzLdQbzw1R\nVbU7hZHp+2/TV7s7igvHzAakanMt1JvBcrdTkGrTcc6uXE77KhARbxYdlJmZlSNP66ODJD0FLAGW\nSFokaULxoZmZWa3lGeZiLnBJROwTEfsAnwd+WGxYZmZWhjxJYUQ63hEAEfEQMKKwiMzMrDR5mqSu\nkPRlYEG6/Hd40Dozs0EpT1L4JDAb+DlJk9RH0nVmZvVpIPS5GKB9JaomhXSe5X+KiM/UKB4zs2IN\nhD4XA7ivRNWkEBGbJR1eq2DMzAo3EPpclH2XUkWe6qOnJC0Ebgfeal8ZET8vLCozMytFnqSwE7AW\nOLFiXZA8YzAzs0Ek1yipEfGHwiMxM7PSddlPQdLHJa0BnpbUJumYGsZlZmYlqNZ57QrguIjYEzgT\n+HptQjIzs7JUqz7aFBHPAkTE45JGVilrZmY90Zu+Es3NcM01hYTTrlpSGF0xp8IWyxHxreLCMjMb\nxAZCX4kuVEsKP+S9ORU6WzYzs94YCH0lupB7PgUzMxv88oySamZmQ4STgpmZZZwUzMws0+UzhQ4t\nj7bg1kdmZoNPtdZHbmlkZjbEuPWRmZlluh0QT1IjcAEwAWhsXx8Rnn3NzGyQyfOgeQGwO3AK8Aug\nCXijyKDMzKwceZLChyLiy8BbETEfOA04uNiwzMysDHmSwsb0/U+SDgJ2BPYtLCIzMytNnkl25kr6\nAPBlYCGwffrZzMwGmTxJ4V8iYjPJ84T9Co7HzMxKlKf66EVJcyWdJEk92bmkyZKek7Rc0qwq5c6U\nFJJaerJ/MzPrX3mSwgHAfwKfAlZKuk7Ssd19SVIDcD1wKjAemCppfCflRgKfBR7vSeBmZtb/uk0K\nEfF2RNwWEZ8AmoEdSKqSunMksDwiVkTEBuAWYEon5b4GfANYnz9sMzMrQq4B8SR9RNJ3gUUkHdj+\nJsfXxgAvVyy3pesq9zsR2Csi/qOb48+Q1Cqpdc2aNXlCNjOzXsjTo3kl8BRwG3BpRLzVHweWNAz4\nFnB+d2UjYi4wF6ClpSX64/hmZralPK2PDomIdb3Y9yvAXhXLTem6diOBg4CH0ufXuwMLJZ0eEa29\nOJ6ZmfVRtaGz/zEi/hm4QtIWf51HxGe62feTwDhJY0mSwdlANlt1RLwO7FJxvIeAf3BCMDMrT7U7\nhWXpe69+pCNik6SZwD1AA3BjRCyRdDnQGhELe7NfMzMrTrWhs/89/fhMRPyqNzuPiLuAuzqs+0oX\nZY/vzTHMzKz/5Gl9dLWkZZK+lo59ZGZmg1SefgonACcAa4AfSHpG0pcKj8zMzGouVz+FiPh9RFwL\nXAgsBjqtAjIzs/rWbVKQdKCkyyQ9A3wHeJSkeamZmQ0yefop3EgyRMUpEfFqwfGYmVmJqiaFdFC7\nFRHx7RrFY2ZmJapafZTOo7CzpK1rFI+ZmZUoT/XRS8AvJS0EsnGPIuJbhUVlZmalyJMUXk1fw0jG\nKzIzs0Gq26QQEbNrEYiZmZUvz9DZDwKdDYh3YiERmZlZafJUH/1DxedG4ExgUzHhmJlZmfJUHy3q\nsOqXkvJMx2lmZnUmT/XRThWLw4DDSSbEMTOzQSZP9dEikmcKIqk2ehG4oMigzMysHHmqj8bWIhAz\nMytflz2aJR0hafeK5emS7pB0bYcqJTMzGySqDXPxA2ADgKS/BK4EbgJeB+YWH5qZmdVateqjhoh4\nLf38t8DciPgZ8DNJi4sPzczMaq3anUKDpPakcRLwQMW2PA+ozcyszlT7cf8J8AtJfwDeAR4BkPQh\nkiokMzMbZLpMChFxhaT7gT2AeyOifaiLYcCnaxGcmZnVVtVqoIh4rJN1zxcXjpmZlanbOZrNzGzo\ncFIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllCk0K\nkiZLek7SckmzOtl+iaSlkp6WdL+kfYqMx8zMqissKUhqAK4HTgXGA1Mlje9Q7CmgJSIOAX4K/HNR\n8ZiZWfeKvFM4ElgeESsiYgNwCzClskBEPBgRb6eLjwFNBcZjZmbdKDIpjAFerlhuS9d15QLg7s42\nSJohqVVS65o1a/oxRDMzqzQgHjRL+jugBfhmZ9sjYm5EtEREy6677lrb4MzMhpAi51p+BdirYrkp\nXfc+kk4Gvgh8JCL+XGA8ZmbWjSLvFJ4ExkkaK2lr4GxgYWUBSYcBPwBOj4jVBcZiZmY5FJYUImIT\nMBO4B1gG3BYRSyRdLun0tNg3ge2B2yUtlrSwi92ZmVkNFFl9RETcBdzVYd1XKj6fXOTxzcysZwbE\ng2YzMxsYnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46Rg\nZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnG\nScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAz\ns4yTgpmZZZwUzMws46RgZmaZQpOCpMmSnpO0XNKsTrZvI+nWdPvjkvYtMh4zM6uusKQgqQG4HjgV\nGA9MlTS+Q7ELgD9GxIeAOcA3iorHzMy6V+SdwpHA8ohYEREbgFuAKR3KTAHmp59/CpwkSQXGZGZm\nVWxV4L7HAC9XLLcBR3VVJiI2SXod2Bn4Q2UhSTOAGenim5Ke62VMu3Tcdx3zuQw8g+U8wOcyUPXl\nXPbJU6jIpNBvImIuMLev+5HUGhEt/RBS6XwuA89gOQ/wuQxUtTiXIquPXgH2qlhuStd1WkbSVsCO\nwNoCYzIzsyqKTApPAuMkjZW0NXA2sLBDmYXAeenns4AHIiIKjMnMzKoorPoofUYwE7gHaABujIgl\nki4HWiNiIXADsEDScuA1ksRRpD5XQQ0gPpeBZ7CcB/hcBqrCz0X+w9zMzNq5R7OZmWWcFMzMLDNk\nkkJ3Q24MVJL2kvSgpKWSlkj6bLp+J0n3SXohff9A2bHmJalB0lOS7kyXx6bDnCxPhz3ZuuwY85A0\nStJPJT0raZmko+v1uki6OP339RtJP5HUWC/XRdKNklZL+k3Fuk6vgxLXpuf0tKSJ5UX+fl2cxzfT\nf19PS/pXSaMqtn0hPY/nJJ3SX3EMiaSQc8iNgWoT8PmIGA9MAj6Vxj4LuD8ixgH3p8v14rPAsorl\nbwBz0uFO/kgy/Ek9+DbwfyLiAOBQknOqu+siaQzwGaAlIg4iaRhyNvVzXeYBkzus6+o6nAqMS18z\ngO/VKMY85rHledwHHBQRhwDPA18ASH8DzgYmpN/5bvo712dDIimQb8iNASkiVkXEr9LPb5D88Izh\n/UOEzAf+ezkR9oykJuA04EfpsoATSYY5gTo5F0k7An9J0oKOiNgQEX+iTq8LSUvEbdP+QtsBq6iT\n6xIRD5O0XqzU1XWYAtwUiceAUZL2qE2k1XV2HhFxb0RsShcfI+nvBcl53BIRf46IF4HlJL9zfTZU\nkkJnQ26MKSmWXktHkT0MeBzYLSJWpZt+D+xWUlg9dQ3wj8C76fLOwJ8q/uHXy7UZC6wB/iWtCvuR\npBHU4XWJiFeAq4DfkSSD14FF1Od1adfVdajn34JPAnennws7j6GSFOqepO2BnwGfi4h1ldvSDn8D\nvm2xpI8BqyNiUdmx9IOtgInA9yLiMOAtOlQV1dF1+QDJX55jgT2BEWxZjVG36uU6VCPpiyRVyT8u\n+lhDJSnkGXJjwJI0nCQh/Dgifp6u/n/tt73p++qy4uuBDwOnS1pJUoV3Ikm9/Ki02gLq59q0AW0R\n8Xi6/FOSJFGP1+Vk4MWIWBMRG4Gfk1yrerwu7bq6DnX3WyDpfOBjwDkVIz4Udh5DJSnkGXJjQErr\n3G8AlkXEtyo2VQ4Rch5wR61j66mI+EJENEXEviTX4IGIOAd4kGSYE6ifc/k98LKkv0hXnQQspQ6v\nC0m10SRJ26X/3trPpe6uS4WursNCYHraCmkS8HpFNdOAI2kySXXr6RHxdsWmhcDZSiYqG0vy4PyJ\nfjloRAyJF/BRkqf3vwW+WHY8PYj7WJJb36eBxenroyR18fcDLwD/CexUdqw9PK/jgTvTz/ul/6CX\nA7cD25QdX85zaAZa02vzb8AH6vW6ALOBZ4HfAAuAberlugA/IXkWspHkDu6Crq4DIJKWiL8FniFp\ncVX6OVQ5j+Ukzw7a/9//fkX5L6bn8Rxwan/F4WEuzMwsM1Sqj8zMLAcnBTMzyzgpmJlZxknBzMwy\nTgpmZpYpbOY1s3onqb1ZI8DuwGaSoS0A3o6IY0oJzKxAbpJqloOky4A3I+KqsmMxK5Krj8x6QdKb\n6fvxkn4h6TZJz0u6UtI5kp6Q9IykD6bldpX0M0lPpq8Pl3sGZp1zUjDru0NJ5og4GDgX2D8ijiQZ\nHvzTaZlvk8xNcARwZrrNbMDxMwWzvnsy0vFzJP0WuDdd/wxwQvr5ZGB8MrQQADtI2j4i3qxppGbd\ncFIw67s/V3x+t2L5Xd77f2wYMCki1tcyMLOecvWRWW3cy3tVSUhqLjEWsy45KZjVxmeAlnQC9qXA\nhWUHZNYZN0k1M7OM7xTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs8z/B/z7J69E\nYs0BAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x20623e6deb8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Get the data\n",
    "inFile1 = 'altman_13_2.txt'\n",
    "inFile2 = 'altman_13_3.txt'\n",
    "url_base = 'https://raw.githubusercontent.com/thomas-haslwanter/statsintro_python/master/ipynb/Data/data_altman/'\n",
    "url1 = url_base + inFile1\n",
    "url2 = url_base + inFile2\n",
    "data_1 = np.genfromtxt(urlopen(url1), delimiter=',')\n",
    "data_2 = np.genfromtxt(urlopen(url2), delimiter=',')\n",
    "\n",
    "# Determine the Kaplan-Meier curves\n",
    "(p1, r1, t1, sp1,se1) = kaplanmeier(data_1)\n",
    "(p2, r2, t2, sp2,se2) = kaplanmeier(data_2)\n",
    "\n",
    "# Make a combined plot for both datasets\n",
    "plt.step(t1,sp1, where='post')\n",
    "plt.step(t2,sp2,'r', where='post')\n",
    "\n",
    "plt.legend(['Data1', 'Data2'])\n",
    "plt.ylim(0,1)\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Survival Probability')\n",
    "#plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "slideshow": {
     "slide_type": "subslide"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X^2 = 3.207\n",
      "p=0.0733, the two survival curves are not signifcantly different.\n"
     ]
    }
   ],
   "source": [
    "'''Logrank hypothesis test, comparing the survival times for two different datasets'''\n",
    "\n",
    "times_1 = data_1[:,0]\n",
    "censored_1 = data_1[:,1]\n",
    "atRisk_1 = np.arange(len(times_1),0,-1)\n",
    "failures_1 = times_1[censored_1==0]\n",
    "\n",
    "times_2 = data_2[:,0]\n",
    "censored_2 = data_2[:,1]\n",
    "atRisk_2 = np.arange(len(times_2),0,-1)\n",
    "failures_2 = times_2[censored_2==0]\n",
    "\n",
    "failures = np.unique(np.hstack((times_1[censored_1==0], times_2[censored_2==0])))\n",
    "num_failures = len(failures)\n",
    "r1 = np.zeros(num_failures)\n",
    "r2 = np.zeros(num_failures)\n",
    "r  = np.zeros(num_failures)\n",
    "f1 = np.zeros(num_failures)\n",
    "f2 = np.zeros(num_failures)\n",
    "f  = np.zeros(num_failures)\n",
    "e1 = np.zeros(num_failures)\n",
    "f1me1 = np.zeros(num_failures)\n",
    "v = np.zeros(num_failures)\n",
    "\n",
    "for ii in range(num_failures):\n",
    "    r1[ii] = np.sum(times_1 >= failures[ii])\n",
    "    r2[ii] = np.sum(times_2 >= failures[ii])\n",
    "    r[ii] = r1[ii] + r2[ii]\n",
    "    \n",
    "    f1[ii] = np.sum(failures_1==failures[ii])\n",
    "    f2[ii] = np.sum(failures_2==failures[ii])\n",
    "    f[ii] = f1[ii] + f2[ii]\n",
    "    \n",
    "    e1[ii] = r1[ii]*f[ii]/r[ii]\n",
    "    f1me1[ii] = f1[ii] - e1[ii]\n",
    "    v[ii] = r1[ii]*r2[ii]*f[ii]*(r[ii]-f[ii]) / ( r[ii]**2 *(r[ii]-1) )\n",
    "\n",
    "    O1 = np.sum(f1)\n",
    "    O2 = np.sum(f2)\n",
    "    E1 = np.sum(e1)\n",
    "    O1mE1 = np.sum(f1me1)\n",
    "    V = sum(v)\n",
    "    \n",
    "chi2 = (O1-E1)**2/V\n",
    "p = stats.chi2.sf(chi2, 1)\n",
    "\n",
    "print('X^2 = {0:5.3f}'.format(chi2))\n",
    "if p < 0.05:\n",
    "    print('p={0:6.4f}, the two survival curves are signifcantly different.'.format(p))\n",
    "else:\n",
    "    print('p={0:6.4f}, the two survival curves are not signifcantly different.'.format(p))"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
